@article{mnih2015human,
  title={Human-level control through deep reinforcement learning},
  author={Mnih, Volodymyr and Kavukcuoglu, Koray and Silver, David and Rusu, Andrei A and Veness, Joel and Bellemare, Marc G and Graves, Alex and Riedmiller, Martin and Fidjeland, Andreas K and Ostrovski, Georg and others},
  journal={nature},
  volume={518},
  number={7540},
  pages={529--533},
  year={2015},
  publisher={Nature Publishing Group}
}
@article{watkins1992q,
  title={Q-learning},
  author={Watkins, Christopher JCH and Dayan, Peter},
  journal={Machine learning},
  volume={8},
  number={3-4},
  pages={279--292},
  year={1992},
  publisher={Springer}
}
@article{hausknecht2015deep,
  title={Deep recurrent q-learning for partially observable mdps},
  author={Hausknecht, Matthew and Stone, Peter},
  journal={arXiv preprint arXiv:1507.06527},
  year={2015}
}
@inproceedings{wang2016dueling,
  title={Dueling network architectures for deep reinforcement learning},
  author={Wang, Ziyu and Schaul, Tom and Hessel, Matteo and Hasselt, Hado and Lanctot, Marc and Freitas, Nando},
  booktitle={International conference on machine learning},
  pages={1995--2003},
  year={2016},
  organization={PMLR}
}
@article{van2015deep,
  title={Deep reinforcement learning with double q-learning},
  author={Van Hasselt, Hado and Guez, Arthur and Silver, David},
  journal={arXiv preprint arXiv:1509.06461},
  year={2015}
}
@article{hasselt2010double,
  title={Double Q-learning},
  author={Hasselt, Hado},
  journal={Advances in neural information processing systems},
  volume={23},
  pages={2613--2621},
  year={2010}
}
@article{van2015deep,
  title={Deep reinforcement learning with double q-learning},
  author={Van Hasselt, Hado and Guez, Arthur and Silver, David},
  journal={arXiv preprint arXiv:1509.06461},
  year={2015}
}
@article{schaul2015prioritized,
  title={Prioritized experience replay},
  author={Schaul, Tom and Quan, John and Antonoglou, Ioannis and Silver, David},
  journal={arXiv preprint arXiv:1511.05952},
  year={2015}
}
@article{dabney2017distributional,
  title={Distributional reinforcement learning with quantile regression},
  author={Dabney, Will and Rowland, Mark and Bellemare, Marc G and Munos, R{\'e}mi},
  journal={arXiv preprint arXiv:1710.10044},
  year={2017}
}
@article{dabney2018implicit,
  title={Implicit quantile networks for distributional reinforcement learning},
  author={Dabney, Will and Ostrovski, Georg and Silver, David and Munos, R{\'e}mi},
  journal={arXiv preprint arXiv:1806.06923},
  year={2018}
}
@article{bellemare2017distributional,
  title={A distributional perspective on reinforcement learning},
  author={Bellemare, Marc G and Dabney, Will and Munos, R{\'e}mi},
  journal={arXiv preprint arXiv:1707.06887},
  year={2017}
}
@article{hessel2017rainbow,
  title={Rainbow: Combining improvements in deep reinforcement learning},
  author={Hessel, Matteo and Modayil, Joseph and Van Hasselt, Hado and Schaul, Tom and Ostrovski, Georg and Dabney, Will and Horgan, Dan and Piot, Bilal and Azar, Mohammad and Silver, David},
  journal={arXiv preprint arXiv:1710.02298},
  year={2017}
}
@article{sutton1999policy,
  title={Policy gradient methods for reinforcement learning with function approximation},
  author={Sutton, Richard S and McAllester, David and Singh, Satinder and Mansour, Yishay},
  journal={Advances in neural information processing systems},
  volume={12},
  pages={1057--1063},
  year={1999}
}
@article{kakade2001natural,
  title={A natural policy gradient},
  author={Kakade, Sham M},
  journal={Advances in neural information processing systems},
  volume={14},
  pages={1531--1538},
  year={2001}
}
@inproceedings{mnih2016asynchronous,
  title={Asynchronous methods for deep reinforcement learning},
  author={Mnih, Volodymyr and Badia, Adria Puigdomenech and Mirza, Mehdi and Graves, Alex and Lillicrap, Timothy and Harley, Tim and Silver, David and Kavukcuoglu, Koray},
  booktitle={International conference on machine learning},
  pages={1928--1937},
  year={2016}
}
@inproceedings{schulman2015trust,
  title={Trust region policy optimization},
  author={Schulman, John and Levine, Sergey and Abbeel, Pieter and Jordan, Michael and Moritz, Philipp},
  booktitle={International conference on machine learning},
  pages={1889--1897},
  year={2015}
}
@article{schulman2017proximal,
  title={Proximal policy optimization algorithms},
  author={Schulman, John and Wolski, Filip and Dhariwal, Prafulla and Radford, Alec and Klimov, Oleg},
  journal={arXiv preprint arXiv:1707.06347},
  year={2017}
}